# mnożenie macierzy - wersja naiwna
function naive_multiplication(A,B)
C = zeros(Float64, size(A,1), size(B,2))
for i=1:size(A,1)
for j=1:size(B,2)
for k=1:size(A,2)
C[i,j] = C[i,j] + A[i,k]*B[k,j]
end
end
end
C
end
naive_multiplication (generic function with 1 method)
# poprawiona funkcja korzytająca z powyższego oraz z faktu, że
# można zmieniać kolejność operacji dodawania (a co za tym idzie kolejnosc petli).
function better_multiplication(A, B)
C = zeros(Float64, size(A,1), size(B,2))
for j=1:size(B,2)
for k=1:size(A,2)
for i=1:size(A,1)
C[i,j] = C[i,j] + A[i,k]*B[k,j]
end
end
end
C
end
better_multiplication (generic function with 1 method)
#generowanie macierzy n*n
#using Random
#using DataFrames
function generate_matrix(n)
result = zeros(Float64, n, n)
for i = 1:n
for j = 1:n
result[i,j] = rand()
end
end
result
end
function test_time(n)
time = zeros(Float64, 10*n, 4)
for i = 1:n
matrix = generate_matrix(i)
for j = 1:10
time[(i-1)*10+j, 1] = i
time[(i-1)*10+j, 2] = @elapsed naive_multiplication(matrix, matrix)
time[(i-1)*10+j, 3] = @elapsed better_multiplication(matrix, matrix)
time[(i-1)*10+j, 4] = @elapsed matrix*matrix
end
end
time
end
result = test_time(500)
df = DataFrame()
df.wielkość_macierzy = result[:,1]
df.naiwne_mnożenie = result[:,2]
df.ulepszone_mnożenie = result[:,3]
df.mnożenie_BLAS = result[:,4]
df
| wielkość_macierzy | naiwne_mnożenie | ulepszone_mnożenie | mnożenie_BLAS | |
|---|---|---|---|---|
| Float64 | Float64 | Float64 | Float64 | |
| 1 | 1.0 | 4.01e-7 | 5.99e-7 | 3.199e-6 |
| 2 | 1.0 | 1.01e-7 | 3.99e-7 | 2.0e-7 |
| 3 | 1.0 | 0.0 | 0.0 | 1.01e-7 |
| 4 | 1.0 | 0.0 | 1.0e-7 | 2.0e-7 |
| 5 | 1.0 | 9.9e-8 | 1.01e-7 | 2.0e-7 |
| 6 | 1.0 | 1.0e-7 | 9.9e-8 | 2.0e-7 |
| 7 | 1.0 | 1.01e-7 | 0.0 | 2.0e-7 |
| 8 | 1.0 | 0.0 | 1.01e-7 | 2.0e-7 |
| 9 | 1.0 | 1.0e-7 | 9.9e-8 | 2.0e-7 |
| 10 | 1.0 | 1.01e-7 | 1.0e-7 | 2.0e-7 |
| 11 | 2.0 | 2.0e-7 | 1.0e-7 | 2.0e-7 |
| 12 | 2.0 | 1.01e-7 | 1.0e-7 | 1.0e-7 |
| 13 | 2.0 | 1.0e-7 | 9.9e-8 | 1.01e-7 |
| 14 | 2.0 | 9.9e-8 | 1.01e-7 | 1.0e-7 |
| 15 | 2.0 | 9.9e-8 | 1.01e-7 | 9.9e-8 |
| 16 | 2.0 | 1.01e-7 | 1.0e-7 | 1.0e-7 |
| 17 | 2.0 | 1.0e-7 | 9.9e-8 | 1.01e-7 |
| 18 | 2.0 | 9.9e-8 | 1.01e-7 | 1.0e-7 |
| 19 | 2.0 | 9.9e-8 | 1.01e-7 | 9.9e-8 |
| 20 | 2.0 | 1.01e-7 | 9.9e-8 | 0.0 |
| 21 | 3.0 | 2.0e-7 | 2.0e-7 | 1.99e-7 |
| 22 | 3.0 | 2.0e-7 | 1.01e-7 | 1.0e-7 |
| 23 | 3.0 | 2.0e-7 | 9.9e-8 | 1.01e-7 |
| 24 | 3.0 | 1.0e-7 | 2.0e-7 | 9.9e-8 |
| 25 | 3.0 | 1.01e-7 | 9.9e-8 | 1.01e-7 |
| 26 | 3.0 | 1.99e-7 | 1.01e-7 | 0.0 |
| 27 | 3.0 | 1.01e-7 | 1.0e-7 | 1.0e-7 |
| 28 | 3.0 | 9.9e-8 | 1.01e-7 | 0.0 |
| 29 | 3.0 | 0.0 | 9.9e-8 | 1.01e-7 |
| 30 | 3.0 | 9.9e-8 | 3.01e-7 | 1.99e-7 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
using Statistics
gd = groupby(df, :wielkość_macierzy)
avg_sd_df = combine(gd, :naiwne_mnożenie => mean,
:ulepszone_mnożenie => mean,
:mnożenie_BLAS => mean,
:naiwne_mnożenie => std,
:ulepszone_mnożenie => std,
:mnożenie_BLAS => std)
avg_sd_df
| wielkość_macierzy | naiwne_mnożenie_mean | ulepszone_mnożenie_mean | mnożenie_BLAS_mean | |
|---|---|---|---|---|
| Float64 | Float64 | Float64 | Float64 | |
| 1 | 1.0 | 1.003e-7 | 1.598e-7 | 4.9e-7 |
| 2 | 2.0 | 1.099e-7 | 1.001e-7 | 1.0e-7 |
| 3 | 3.0 | 1.299e-7 | 1.401e-7 | 1.0e-7 |
| 4 | 4.0 | 1.601e-7 | 1.599e-7 | 2.198e-7 |
| 5 | 5.0 | 2.6e-7 | 2.7e-7 | 2.402e-7 |
| 6 | 6.0 | 3.801e-7 | 4.1e-7 | 2.499e-7 |
| 7 | 7.0 | 5.898e-7 | 6.102e-7 | 3.597e-7 |
| 8 | 8.0 | 7.801e-7 | 8.397e-7 | 4.101e-7 |
| 9 | 9.0 | 1.16e-6 | 1.1798e-6 | 4.097e-7 |
| 10 | 10.0 | 1.5298e-6 | 1.5398e-6 | 4.501e-7 |
| 11 | 11.0 | 2.11e-6 | 2.0899e-6 | 4.9e-7 |
| 12 | 12.0 | 2.7101e-6 | 2.5896e-6 | 4.798e-7 |
| 13 | 13.0 | 3.4598e-6 | 3.2e-6 | 6.001e-7 |
| 14 | 14.0 | 4.4098e-6 | 3.9699e-6 | 7.3e-7 |
| 15 | 15.0 | 5.41e-6 | 4.8403e-6 | 8.396e-7 |
| 16 | 16.0 | 6.5696e-6 | 5.9502e-6 | 8.297e-7 |
| 17 | 17.0 | 7.8198e-6 | 6.9305e-6 | 9.102e-7 |
| 18 | 18.0 | 9.6403e-6 | 8.1497e-6 | 1.1e-6 |
| 19 | 19.0 | 1.107e-5 | 9.6304e-6 | 1.2799e-6 |
| 20 | 20.0 | 1.32401e-5 | 1.11897e-5 | 1.2402e-6 |
| 21 | 21.0 | 1.50698e-5 | 1.28196e-5 | 1.3906e-6 |
| 22 | 22.0 | 1.75997e-5 | 1.43904e-5 | 1.39e-6 |
| 23 | 23.0 | 1.98098e-5 | 1.65301e-5 | 1.8901e-6 |
| 24 | 24.0 | 2.30797e-5 | 1.87603e-5 | 1.8999e-6 |
| 25 | 25.0 | 2.57298e-5 | 2.12599e-5 | 2.1099e-6 |
| 26 | 26.0 | 2.94701e-5 | 2.36699e-5 | 2.2198e-6 |
| 27 | 27.0 | 3.269e-5 | 2.63995e-5 | 2.7702e-6 |
| 28 | 28.0 | 3.70295e-5 | 2.94203e-5 | 2.4501e-6 |
| 29 | 29.0 | 4.22e-5 | 3.24399e-5 | 3.1898e-6 |
| 30 | 30.0 | 4.617e-5 | 3.61397e-5 | 3.1002e-6 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
using Plots
scatter([avg_sd_df.naiwne_mnożenie_mean, avg_sd_df.ulepszone_mnożenie_mean, avg_sd_df.mnożenie_BLAS_mean],label=["mnożenie naiwne" "ulepszone mnożenie naiwne" "mnożenie BLAS"],title="porównanie czasowe różnych metod mnożenia macierzy", xlabel="wielkość macierzy", ylabel="czas[s]", yerror=[avg_sd_df.naiwne_mnożenie_std avg_sd_df.ulepszone_mnożenie_std avg_sd_df.mnożenie_BLAS_std])
kod programu napisany w języku C
#include <stdio.h>
#include <stdlib.h>
#include <gsl/gsl_blas.h>
#include <time.h>
double** zeros(int x1, int y2){
double** result = malloc(sizeof(double*)*x1);
for(int i = 0; i < x1; i++){
result[i] = malloc(sizeof(double)*y2);
for(int j = 0; j < y2; j++){
result[i][j] = 0;
}
}
return result;
}
double* zeros1(int x){
double* result = malloc(sizeof(double)*x);
for(int i = 0; i < x; i++){
result[i] = 0;
}
return result;
}
double* generate_matrix1(int x){
double* result = zeros1(x);
for(int i = 0; i < x; i++){
result[i] = rand()% 100000;
}
return result;
}
double** generate_matrix(int x1, int y2){
double** result = zeros(x1, y2);
for(int i = 0; i < x1; i++){
for(int j = 0; j < y2; j++){
result[i][j] = rand() % 100000;
}
}
return result;
}
void dealloc_memory(double** matrix, int x1, int y2){
for(int i = 0; i < x1; i++){
free(matrix[i]);
}
free(matrix);
}
double** naive_multiplication(double** matrix1, double** matrix2, int x1, int y1, int x2, int y2){
double** result = zeros(x1,y2);
for(int i = 0; i < x1; i++){
for(int j = 0; j < y2; j++){
for(int k = 0; k < y1; k++){
result[i][j] = result[i][j] + matrix1[i][k]*matrix2[k][j];
}
}
}
return result;
}
double** better_multiplication(double** matrix1, double** matrix2, int x1, int y1, int x2, int y2){
double** result = zeros(x1,y2);
for(int i = 0; i < x1; i++){
for(int k = 0; k < y1; k++){
for(int j = 0; j < y2; j++){
result[i][j] = result[i][j] + matrix1[i][k]*matrix2[k][j];
}
}
}
return result;
}
int main(){
srand (time ( NULL));
int n = 300;
FILE* report = fopen("raportAcceleration.csv","w");
for(int i = 1; i < n; i++){
double** matrix2 = generate_matrix(i, i);
double* matrix1 = generate_matrix1(i*i);
for(int j = 0; j < 10; j++){
double* resultGSL = zeros1(i*i);
gsl_matrix_view A = gsl_matrix_view_array(matrix1, i, i);
gsl_matrix_view B = gsl_matrix_view_array(matrix1, i, i);
gsl_matrix_view C = gsl_matrix_view_array(resultGSL, i, i);
clock_t start = clock();
double** result1 = naive_multiplication(matrix2, matrix2,i,i,i,i);
clock_t end = clock();
double secondsNaive = (double)(end - start) / CLOCKS_PER_SEC;
start = clock();
double** result2 = better_multiplication(matrix2, matrix2,i,i,i,i);
end = clock();
double secondsBetter = (double)(end - start) / CLOCKS_PER_SEC;
start = clock();
gsl_blas_dgemm (CblasNoTrans, CblasNoTrans,
1.0, &A.matrix, &B.matrix,
0.0, &C.matrix);
end = clock();
double secondsGSL = (double)(end - start) / CLOCKS_PER_SEC;
fprintf(report, "%d:%lf:%lf:%lf\n",i, secondsNaive, secondsBetter, secondsGSL);
free(resultGSL);
dealloc_memory(result1, i, i);
dealloc_memory(result2, i, i);
}
dealloc_memory(matrix2,i,i);
free(matrix1);
}
fclose(report);
return 0;
}
po dwukrotnym uruchomieniu z różnymi opcjami kompilacji, uzyskuję dwa pliki .csv z których zostaną utworzone ramki danych
#using CSV
#konwencja: Acc - Acceleration; NoAcc - No Acceleration
dfAcc = CSV.read("raportAcceleration.csv", delim=":", DataFrame)
dfNoAcc = CSV.read("raportNoAcceleration.csv", delim=":", DataFrame)
| wielkość_macierzy | mnożenie_naiwne | mnożenie_ulepszone | mnożenie_BLAS | |
|---|---|---|---|---|
| Int64 | Float64 | Float64 | Float64 | |
| 1 | 1 | 1.0e-6 | 0.0 | 2.0e-6 |
| 2 | 1 | 0.0 | 0.0 | 0.0 |
| 3 | 1 | 1.0e-6 | 1.0e-6 | 1.0e-6 |
| 4 | 1 | 0.0 | 0.0 | 0.0 |
| 5 | 1 | 1.0e-6 | 0.0 | 0.0 |
| 6 | 1 | 1.0e-6 | 1.0e-6 | 1.0e-6 |
| 7 | 1 | 0.0 | 1.0e-6 | 1.0e-6 |
| 8 | 1 | 0.0 | 0.0 | 0.0 |
| 9 | 1 | 0.0 | 0.0 | 0.0 |
| 10 | 1 | 1.0e-6 | 1.0e-6 | 0.0 |
| 11 | 2 | 0.0 | 1.0e-6 | 1.0e-6 |
| 12 | 2 | 0.0 | 0.0 | 0.0 |
| 13 | 2 | 0.0 | 0.0 | 0.0 |
| 14 | 2 | 1.0e-6 | 1.0e-6 | 1.0e-6 |
| 15 | 2 | 0.0 | 0.0 | 1.0e-6 |
| 16 | 2 | 0.0 | 0.0 | 0.0 |
| 17 | 2 | 1.0e-6 | 1.0e-6 | 0.0 |
| 18 | 2 | 0.0 | 1.0e-6 | 1.0e-6 |
| 19 | 2 | 0.0 | 0.0 | 0.0 |
| 20 | 2 | 1.0e-6 | 1.0e-6 | 1.0e-6 |
| 21 | 3 | 1.0e-6 | 1.0e-6 | 1.0e-6 |
| 22 | 3 | 1.0e-6 | 1.0e-6 | 1.0e-6 |
| 23 | 3 | 1.0e-6 | 1.0e-6 | 1.5e-5 |
| 24 | 3 | 0.0 | 0.0 | 0.0 |
| 25 | 3 | 1.0e-6 | 0.0 | 0.0 |
| 26 | 3 | 1.0e-6 | 1.0e-5 | 1.0e-6 |
| 27 | 3 | 0.0 | 0.0 | 0.0 |
| 28 | 3 | 1.0e-6 | 1.0e-6 | 1.0e-6 |
| 29 | 3 | 1.0e-6 | 0.0 | 0.0 |
| 30 | 3 | 0.0 | 1.0e-6 | 8.0e-6 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
#grupowanie
gdAcc = groupby(dfAcc, :wielkość_macierzy)
dfAcc_combined = combine(gdAcc, :mnożenie_naiwne => mean,
:mnożenie_ulepszone => mean,
:mnożenie_BLAS => mean,
:mnożenie_naiwne => std,
:mnożenie_ulepszone => std,
:mnożenie_BLAS => std)
gdNoAcc = groupby(dfNoAcc, :wielkość_macierzy)
dfNoAcc_combined = combine(gdNoAcc, :mnożenie_naiwne => mean,
:mnożenie_ulepszone => mean,
:mnożenie_BLAS => mean,
:mnożenie_naiwne => std,
:mnożenie_ulepszone => std,
:mnożenie_BLAS => std)
| wielkość_macierzy | mnożenie_naiwne_mean | mnożenie_ulepszone_mean | mnożenie_BLAS_mean | |
|---|---|---|---|---|
| Int64 | Float64 | Float64 | Float64 | |
| 1 | 1 | 5.0e-7 | 4.0e-7 | 5.0e-7 |
| 2 | 2 | 3.0e-7 | 5.0e-7 | 5.0e-7 |
| 3 | 3 | 7.0e-7 | 1.5e-6 | 2.7e-6 |
| 4 | 4 | 6.0e-7 | 6.0e-7 | 4.0e-7 |
| 5 | 5 | 8.0e-7 | 7.0e-7 | 1.4e-6 |
| 6 | 6 | 1.0e-6 | 2.0e-7 | 2.0e-7 |
| 7 | 7 | 9.0e-7 | 8.0e-7 | 6.0e-7 |
| 8 | 8 | 1.9e-6 | 9.0e-7 | 8.0e-7 |
| 9 | 9 | 1.5e-6 | 1.0e-6 | 1.0e-6 |
| 10 | 10 | 1.8e-6 | 2.1e-6 | 1.2e-6 |
| 11 | 11 | 2.4e-6 | 1.6e-6 | 1.3e-6 |
| 12 | 12 | 3.3e-6 | 2.1e-6 | 1.4e-6 |
| 13 | 13 | 3.8e-6 | 2.6e-6 | 1.8e-6 |
| 14 | 14 | 4.9e-6 | 2.8e-6 | 2.2e-6 |
| 15 | 15 | 5.6e-6 | 4.3e-6 | 2.5e-6 |
| 16 | 16 | 6.8e-6 | 4.2e-6 | 3.0e-6 |
| 17 | 17 | 8.0e-6 | 4.5e-6 | 3.4e-6 |
| 18 | 18 | 9.4e-6 | 5.4e-6 | 3.8e-6 |
| 19 | 19 | 1.12e-5 | 6.4e-6 | 4.3e-6 |
| 20 | 20 | 1.33e-5 | 8.7e-6 | 7.5e-6 |
| 21 | 21 | 1.54e-5 | 8.2e-6 | 5.3e-6 |
| 22 | 22 | 1.99e-5 | 9.5e-6 | 6.3e-6 |
| 23 | 23 | 2.04e-5 | 1.04e-5 | 6.9e-6 |
| 24 | 24 | 2.47e-5 | 1.55e-5 | 7.9e-6 |
| 25 | 25 | 2.71e-5 | 1.56e-5 | 1.02e-5 |
| 26 | 26 | 3.26e-5 | 1.66e-5 | 9.7e-6 |
| 27 | 27 | 3.66e-5 | 1.81e-5 | 1.1e-5 |
| 28 | 28 | 4.11e-5 | 1.88e-5 | 1.18e-5 |
| 29 | 29 | 4.3e-5 | 2.05e-5 | 1.28e-5 |
| 30 | 30 | 4.97e-5 | 2.22e-5 | 1.4e-5 |
| ⋮ | ⋮ | ⋮ | ⋮ | ⋮ |
scatter([dfAcc_combined.mnożenie_naiwne_mean dfAcc_combined.mnożenie_ulepszone_mean dfAcc_combined.mnożenie_BLAS_mean dfNoAcc_combined.mnożenie_naiwne_mean dfNoAcc_combined.mnożenie_ulepszone_mean dfNoAcc_combined.mnożenie_BLAS_mean],layout=2,label=["mnożenie naiwne" "ulepszone mnożenie naiwne" "mnożenie BLAS"],title=["czas mnożenia macierzy z opcją -O3" "czas mnożenia macierzy z opcją -O"], size=(1000,600), xlabel="wielkość macierzy", ylabel="czas[s]", yerror=[dfAcc_combined.mnożenie_naiwne_std dfAcc_combined.mnożenie_ulepszone_std dfAcc_combined.mnożenie_BLAS_std dfNoAcc_combined.mnożenie_naiwne_std dfNoAcc_combined.mnożenie_ulepszone_std dfNoAcc_combined.mnożenie_BLAS_std])
#using Polynomials
fit_naive = fit(avg_sd_df.wielkość_macierzy, avg_sd_df.naiwne_mnożenie_mean, 3)
fit_better = fit(avg_sd_df.wielkość_macierzy, avg_sd_df.ulepszone_mnożenie_mean, 3)
fit_BLAS = fit(avg_sd_df.wielkość_macierzy, avg_sd_df.mnożenie_BLAS_mean, 3)
scatter([avg_sd_df.naiwne_mnożenie_mean, avg_sd_df.ulepszone_mnożenie_mean, avg_sd_df.mnożenie_BLAS_mean],size=(1000,600),label=["mnożenie naiwne" "ulepszone mnożenie naiwne" "mnożenie BLAS"],title="porównanie czasowe różnych metod mnożenia macierzy", xlabel="wielkość macierzy", ylabel="czas[s]", yerror=[avg_sd_df.naiwne_mnożenie_std avg_sd_df.ulepszone_mnożenie_std avg_sd_df.mnożenie_BLAS_std])
plot!(fit_naive, extrema(avg_sd_df.wielkość_macierzy)..., label="aproksymacja mnożenia naiwnego")
plot!(fit_better, extrema(avg_sd_df.wielkość_macierzy)..., label="aproksymacja mnożenia ulepszonego")
plot!(fit_BLAS, extrema(avg_sd_df.wielkość_macierzy)..., label="aproksymacja mnożenia BLAS")
#demonstracja efektu Runge
using Polynomials
using Plots
n = 10
#przykład interpolacji następującej funkcji:
function test(x)
1/(1+7x^2)
end
#najpierw dla węzłów równoodległych:
xs = -1:(2/n):1
ys = [test(x) for x in xs]
scatter(xs,ys, label="węzły interpolacji")
lagrange = fit(xs,ys)
plot!(lagrange, extrema(xs)...,label="interpolacja równoodległa", title="demonstracja efektu Rungego", xlabel="x",ylabel="f(x)")
plot!(test, extrema(xs)..., label="funkcja testowa")
#aby rozwiązać ten problem, należy użyc do interpolacji
#węzłów będących miejscami zerowymi wielomianu Czebyszewa o numerze n +1
arg = zeros(Int64, n+1)
arg[n+1] = 1
t_n = ChebyshevT(arg)
xs1 = Polynomials.roots(t_n)
ys1 = [test(x) for x in xs1]
lagrange1 = fit(xs1, ys1)
scatter(xs1,ys1, label="węzły interpolacji")
plot!(lagrange1, extrema(xs1)...,label="interpolacja węzłami Czebyszewa", title="niwelowanie efektu Rungego", xlabel="x",ylabel="f(x)")
plot!(test, extrema(xs1)..., label="funkcja testowa")
#przykład z wykładu
#using TaylorSeries
function f(x)
(7 + (1 + x)^(4/3))^(1/3)
end
t = Taylor1(Float64, 5)
f_taylor = f(t)
f_pol = Polynomial(f_taylor.coeffs)
f_pade_tmp = Polynomials.PolyCompat.PadeApproximation.Pade(f_pol, 2, 2)
function f_pade1(x)
f_pade_tmp.p(x)/(f_pade_tmp.q(x))
end
plot(f, xlims=(0,25), ylims=(0,15), label="f(x)")
plot!(f_pol, label="Taylor",xlims=(0,28), ylims=(0,15))
plot!(f_pade1, label="Pade")